This script plots individual antibody trajectories in the Asembo, Kenya cohort from enrollment to follow-up. 205 children were measured at both timepoints.
## here() starts at /Users/benarnold/enterics-seroepi
## [1] "/Users/benarnold/enterics-seroepi"
## ── Attaching packages ────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
Identify children whose status changed between enrollment and follow-up. Those who changed from negative to positive are seroconverters (seroi below), and those who changed from positive to negative are seroreverters (seror below).
#-----------------------------
# identify incident
# seroconversions and reversions
# based on crossing the
# seropositivity cutoff
# and
# based on a 4-fold change in MFI
# to above the cutoff (seroconversion)
# or starting above cutoff (seroreversion)
#-----------------------------
dlp <- dl %>%
group_by(antigen,antigenf,childid) %>%
mutate(seroposA=ifelse(time=="A",seropos,NA),
seroposA=max(seroposA,na.rm=TRUE),
seroposB=ifelse(time=="B",seropos,NA),
seroposB=max(seroposB,na.rm=TRUE),
seroi=ifelse(seroposB==1 & seroposA==0,1,0),
seror=ifelse(seroposB==0 & seroposA==1,1,0),
mfiA=ifelse(time=="A",logmfi,NA),
mfiA=max(mfiA,na.rm=TRUE),
mfiB=ifelse(time=="B",logmfi,NA),
mfiB=max(mfiB,na.rm=TRUE),
seroi4fold=ifelse(logmfidiff>log10(4) & mfiB>serocut,1,0),
seror4fold=ifelse(logmfidiff< - log10(4) & mfiA>serocut,1,0)
) Longitudinal antibody trajectories of individual children for all antigens measured, colored by the change in antibody levels (increases and decreases)
#-----------------------------
# Plot individual antibody
# trajectories, colored
# by increases and decreases
#-----------------------------
# create an change category factor for plot aesthetics
dlp <- dlp %>%
mutate(mficat=factor(ifelse(logmfidiff>0,"Increase","Decrease"),levels=c("Increase","Decrease")))
# convert time to numeric (for plotting)
dlp <- dlp %>%
mutate(svy=ifelse(time=="A",0,1))
log10labs <- c(
expression(10^0),
expression(10^1),
expression(10^2),
expression(10^3),
expression(10^4)
)
ppq <- ggplot(data=filter(dlp,!is.na(mficat)),aes(x=svy,y=logmfi,group=factor(childid),color=mficat)) +
# geom_point(alpha=0.2) +
geom_line(alpha=0.2) +
# geom_hline(aes(yintercept=mixcut),linetype="dashed") +
scale_x_continuous(limits=c(-0.25,1.25),breaks=c(0,1),labels=c("Enrollment","Follow-up"))+
scale_y_continuous(breaks=0:4,labels=log10labs) +
scale_color_manual(values=c(corange,cteal,"gray80"),
guide=guide_legend(title="MFI change:",override.aes = list(alpha=1))
) +
facet_wrap(~antigenf,nrow=6,ncol=2) +
labs(x="Measurement",y="Luminex response (MFI-bg)") +
theme_minimal() +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
axis.ticks.y=element_line(color="gray40"),
legend.position = "top"
)
ppqCreate the same figure of individual antibody trajectories between measurements, but limit it to antigens with seroincidence measures, and color the trajectories by seroconversion/reversion status.
#-----------------------------
# pull the incidence info
# and just merge it back
# to the full longidudinal data
#-----------------------------
dl2 <- dlp
# create an incidence category factor for plot aesthetics
# for seroconversion based on seropositivity only
dl2$icat <- factor(NA,levels=c("Seroconversion","Seroreversion","No change"))
dl2$icat[dl2$seroi==1] <- "Seroconversion"
dl2$icat[dl2$seror==1] <- "Seroreversion"
dl2$icat[dl2$seroi==0 & dl2$seror==0] <- "No change"
# create an incidence category factor for plot aesthetics
# for seroconversion based on 4-fold changes in MFI
dl2$i4cat <- factor(NA,levels=c("Seroconversion","Seroreversion","No change"))
dl2$i4cat[dl2$seroi4fold==1] <- "Seroconversion"
dl2$i4cat[dl2$seror4fold==1] <- "Seroreversion"
dl2$i4cat[dl2$seroi4fold==0 & dl2$seror==0] <- "No change"
# create an incidence category factor for plot aesthetics
# for seroconversion comparing both methods
dl2$icat_comp <- factor(NA,levels=c(">4-fold increase, across cutoff",">4-fold decrease",">4-fold increase, above cutoff","<4-fold change"))
dl2$icat_comp[dl2$seroi4fold==1 & dl2$seroi==1] <- ">4-fold increase, across cutoff"
dl2$icat_comp[dl2$seror4fold==1] <- ">4-fold decrease"
dl2$icat_comp[dl2$seroi4fold==1 & dl2$seroi==0] <- ">4-fold increase, above cutoff"
dl2$icat_comp[is.na(dl2$icat_comp)] <- "<4-fold change"
dl2 <- dl2 %>%
mutate(icat_comp = factor(icat_comp,levels=c(">4-fold increase, across cutoff",
">4-fold increase, above cutoff",
">4-fold decrease",
"<4-fold change"))
)
pp <- ggplot(data=filter(dl2,!is.na(icat)),aes(x=svy,y=logmfi,group=factor(childid),color=icat_comp)) +
# geom_point(alpha=0.2) +
geom_hline(aes(yintercept=serocut),linetype="dashed",size=0.3) +
geom_line(alpha=0.2) +
scale_x_continuous(limits=c(-0.25,1.25),breaks=c(0,1),labels=c("Enrollment","Follow-up"))+
scale_y_continuous(breaks=0:4,labels=log10labs) +
scale_color_manual(values=c(corange,cmagent,cteal,"gray80"),
guide=guide_legend(title="MFI change:", override.aes = list(alpha=1), ncol=2,nrow=2),
) +
facet_wrap(~antigenf,nrow=6,ncol=2) +
labs(x="Measurement",y="Luminex response (MFI-bg)") +
theme_minimal() +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
axis.ticks.y=element_line(color="gray40"),
legend.position = "top"
)
pp#-----------------------------
# final figure, excluding cholera
# due to cross-reactivity with ETEC
#-----------------------------
relev <- levels(dl2$antigenf)[c(1:6,11,7,9,10)]
dl3 <- dl2 %>%
filter(!is.na(icat) & antigen!="cholera") %>%
ungroup() %>%
mutate(antigenf=factor(antigenf,levels=relev))
pp2 <- ggplot(data=dl3,aes(x=svy,y=logmfi,group=factor(childid),color=icat_comp)) +
# geom_point(alpha=0.2) +
geom_hline(aes(yintercept=serocut),linetype="dashed",size=0.3) +
geom_line(alpha=0.2) +
scale_x_continuous(limits=c(-0.25,1.25),breaks=c(0,1),labels=c("Enrollment","Follow-up"))+
scale_y_continuous(breaks=0:4,labels=log10labs) +
scale_color_manual(values=c(corange,cmagent,cteal,"gray80"),
guide=guide_legend(title="MFI change:", override.aes = list(alpha=1), ncol=2,nrow=2),
) +
facet_wrap(~antigenf,nrow=6,ncol=2) +
labs(x="Measurement",y="Luminex response (MFI-bg)") +
theme_minimal() +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
axis.ticks.y=element_line(color="gray40"),
legend.position = "top"
)
pp2Estimate prevalence of each category
dl4 <- dl3 %>%
group_by(antigenf,icat_comp, .drop=FALSE) %>%
tally() %>%
group_by(antigenf) %>%
mutate(N=sum(n),
q=N-n,
prev=n/N
)
# exact binomial confidence intervals on prevalence estimates
ci_lb <- apply(dl4[c("n","q")],1,function(x) binom.test(x,alternative="two.sided")$conf.int[1])
ci_ub <- apply(dl4[c("n","q")],1,function(x) binom.test(x,alternative="two.sided")$conf.int[2])
dl4 <- dl4 %>%
bind_cols(data.frame(ci_lb,ci_ub)) %>%
arrange(antigenf,icat_comp)# compare seroconversion classification prevalences by measurement
sero_comp_p <- ggplot(data=dl4, aes(x=icat_comp, y=prev,color=icat_comp) ) +
facet_wrap(~antigenf,nrow=6,ncol=2)+
geom_errorbar(aes(ymin=ci_lb,ymax=ci_ub),width=0.1)+
geom_point()+
scale_color_manual(values=c(corange,cmagent,cteal,"gray50"),
guide=guide_legend(title="MFI change:",
override.aes = list(alpha=1),
nrow=2,ncol=2
))+
xlab("From Enrollment to Follow-up") +
ylab("Percentage of children (%)") +
coord_cartesian(ylim=c(0,1),xlim=c(1,4))+
scale_y_continuous(breaks=seq(0,1,by=0.2),labels=sprintf("%1.0f",seq(0,1,by=0.2)*100)) +
# scale_x_continuous(breaks=1:5) +
theme_minimal() +
theme(
legend.position="top",
panel.grid.minor.x=element_blank(),
axis.text.x = element_blank()
)
sero_comp_p## R version 3.5.3 (2019-03-11)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.3.0 stringr_1.4.0 dplyr_0.8.0.1 purrr_0.3.2
## [5] readr_1.1.1 tidyr_0.8.3 tibble_2.1.1 ggplot2_3.1.1
## [9] tidyverse_1.2.1 here_0.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 xfun_0.6 reshape2_1.4.3 haven_2.1.0
## [5] lattice_0.20-38 colorspace_1.3-2 htmltools_0.3.6 yaml_2.2.0
## [9] rlang_0.3.4 pillar_1.4.0 foreign_0.8-71 glue_1.3.1
## [13] withr_2.1.2 modelr_0.1.2 readxl_1.1.0 plyr_1.8.4
## [17] munsell_0.5.0 gtable_0.3.0 cellranger_1.1.0 rvest_0.3.2
## [21] psych_1.8.4 evaluate_0.13 knitr_1.22 parallel_3.5.3
## [25] broom_0.4.4 Rcpp_1.0.1 backports_1.1.4 scales_1.0.0
## [29] jsonlite_1.6 mnormt_1.5-5 hms_0.4.2 digest_0.6.18
## [33] stringi_1.4.3 grid_3.5.3 rprojroot_1.3-2 cli_1.1.0
## [37] tools_3.5.3 magrittr_1.5 lazyeval_0.2.2 crayon_1.3.4
## [41] pkgconfig_2.0.2 xml2_1.2.0 lubridate_1.7.4 assertthat_0.2.1
## [45] rmarkdown_1.12 httr_1.4.0 rstudioapi_0.9.0 R6_2.4.0
## [49] nlme_3.1-137 compiler_3.5.3